export VLLM_ENGINE_ITERATION_TIMEOUT_S=36000
export VLLM_RPC_TIMEOUT=36000000
export VLLM_ENFORCE_CUDA_GRAPH=0

# vllm serve /home/weights/Qwen3-30B-A3B-AWQ --served_model_name "qwen3-30" -tp 2 --max_model_len $[32*1024] --enable-reasoning --reasoning-parser qwen3 --disable_log_stats  --trust_remote_code --host 0.0.0.0  --port 8000

vllm serve /home/weights/Qwen3-30B-A3B-AWQ --served_model_name "qwen3-30" -tp 2 -pp 2 --max_model_len $[32*1024] --enable-reasoning --reasoning-parser qwen3 --disable_log_stats  --trust_remote_code --host 0.0.0.0  --port 8000